vllm serve /home/renchong/.cache/modelscope/hub/Qwen/Qwen3-4B-Instruct-2507	\
	--max-model-len 1024 --port 10085 \
	--served-model-name base_model \
	--gpu_memory_utilization 0.8 